In [ ]:
# <H1>Diabetes Dataset Analysis: Data Preprocessing, Visualization, and Feature Engineering</H1>
In [2]:
# Installing the libraries
!pip install featuretools
!pip install tidypy
!pip install --upgrade autoviz
!pip install seaborn missingno
!pip install --upgrade pandas
!pip install sweetviz
!pip install --upgrade jinja2
!pip install pylint==2.11.1
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Requirement already satisfied: featuretools in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (1.31.0)
Requirement already satisfied: cloudpickle>=1.5.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from featuretools) (3.0.0)
Requirement already satisfied: holidays>=0.17 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from featuretools) (0.57)
Requirement already satisfied: numpy>=1.25.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from featuretools) (1.26.3)
Requirement already satisfied: packaging>=20.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from featuretools) (24.1)
Collecting pandas>=2.0.0 (from featuretools)
Downloading pandas-2.2.3-cp310-cp310-win_amd64.whl.metadata (19 kB)
Requirement already satisfied: psutil>=5.7.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from featuretools) (6.0.0)
Requirement already satisfied: scipy>=1.10.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from featuretools) (1.13.1)
Requirement already satisfied: tqdm>=4.66.3 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from featuretools) (4.66.5)
Requirement already satisfied: woodwork>=0.28.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from featuretools) (0.31.0)
Requirement already satisfied: python-dateutil in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from holidays>=0.17->featuretools) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas>=2.0.0->featuretools) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas>=2.0.0->featuretools) (2024.1)
Requirement already satisfied: colorama in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tqdm>=4.66.3->featuretools) (0.4.6)
Requirement already satisfied: scikit-learn>=1.1.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from woodwork>=0.28.0->featuretools) (1.3.0)
Requirement already satisfied: importlib-resources>=5.10.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from woodwork>=0.28.0->featuretools) (6.4.5)
Requirement already satisfied: six>=1.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from python-dateutil->holidays>=0.17->featuretools) (1.16.0)
Requirement already satisfied: joblib>=1.1.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn>=1.1.0->woodwork>=0.28.0->featuretools) (1.4.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn>=1.1.0->woodwork>=0.28.0->featuretools) (3.5.0)
Downloading pandas-2.2.3-cp310-cp310-win_amd64.whl (11.6 MB)
---------------------------------------- 0.0/11.6 MB ? eta -:--:--
-- ------------------------------------- 0.8/11.6 MB 6.7 MB/s eta 0:00:02
---------------------- ----------------- 6.6/11.6 MB 23.7 MB/s eta 0:00:01
---------------------------------------- 11.6/11.6 MB 27.9 MB/s eta 0:00:00
Installing collected packages: pandas
Attempting uninstall: pandas
Found existing installation: pandas 1.5.3
Uninstalling pandas-1.5.3:
Successfully uninstalled pandas-1.5.3
Successfully installed pandas-2.2.3
WARNING: Failed to remove contents in a temporary directory 'C:\Users\manis\anaconda3\envs\CAIAssignment21\Lib\site-packages\~~ndas'. You can safely remove it manually. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. autoviz 0.1.905 requires xgboost<1.7,>=0.82, but you have xgboost 2.1.1 which is incompatible. dataprep 0.4.5 requires jinja2<3.1,>=3.0, but you have jinja2 3.1.4 which is incompatible. dataprep 0.4.5 requires pandas<2.0,>=1.1, but you have pandas 2.2.3 which is incompatible. holoviews 1.19.1 requires bokeh>=3.1, but you have bokeh 2.4.3 which is incompatible. panel 1.5.0 requires bokeh<3.6.0,>=3.5.0, but you have bokeh 2.4.3 which is incompatible.
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Requirement already satisfied: tidypy in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (0.22.0)
Requirement already satisfied: bandit<2,>=1.4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (1.7.9)
Requirement already satisfied: basicserial<2.0,>=0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (1.2.1)
Requirement already satisfied: check-manifest<0.48,>=0.42 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (0.47)
Requirement already satisfied: click<9,>=6 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (8.1.7)
Requirement already satisfied: demjson3<4,>3 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (3.0.6)
Requirement already satisfied: dennis<1,>=0.9 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (0.9)
Requirement already satisfied: detect-secrets<2,>=1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (1.5.0)
Requirement already satisfied: dlint<0.13,>=0.8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (0.12.0)
Requirement already satisfied: eradicate<3,>=2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (2.3.0)
Requirement already satisfied: mccabe<0.7,>=0.6 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (0.6.1)
Requirement already satisfied: pep8-naming<0.13,>=0.4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (0.12.1)
Requirement already satisfied: pycodestyle<2.9,>=2.4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (2.8.0)
Requirement already satisfied: pydiatra<0.13,>=0.12.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (0.12.7)
Requirement already satisfied: pydocstyle<7,>=4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (6.3.0)
Requirement already satisfied: pyflakes<2.5,>=1.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (2.4.0)
Requirement already satisfied: pylint<2.12,>=2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (2.11.1)
Requirement already satisfied: pyroma<4,>=2.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (3.3)
Requirement already satisfied: pyyaml<7,>=3.12 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (6.0.1)
Requirement already satisfied: requests<3,>=2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (2.32.3)
Requirement already satisfied: restructuredtext-lint<2,>=1.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (1.4.0)
Requirement already satisfied: toml<0.11,>=0.10 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (0.10.2)
Requirement already satisfied: tqdm<5,>=4.11 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (4.66.5)
Requirement already satisfied: vulture<3,>=2.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (2.12)
Requirement already satisfied: yamllint<2,>=1.8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tidypy) (1.35.1)
Requirement already satisfied: stevedore>=1.20.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from bandit<2,>=1.4->tidypy) (5.3.0)
Requirement already satisfied: rich in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from bandit<2,>=1.4->tidypy) (13.8.1)
Requirement already satisfied: colorama>=0.3.9 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from bandit<2,>=1.4->tidypy) (0.4.6)
Requirement already satisfied: iso8601 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from basicserial<2.0,>=0.1->tidypy) (2.1.0)
Requirement already satisfied: build>=0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from check-manifest<0.48,>=0.42->tidypy) (1.2.2)
Requirement already satisfied: setuptools in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from check-manifest<0.48,>=0.42->tidypy) (72.1.0)
Requirement already satisfied: polib>=1.0.8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from dennis<1,>=0.9->tidypy) (1.2.0)
Requirement already satisfied: flake8<5.0.0,>=3.6.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from dlint<0.13,>=0.8->tidypy) (4.0.1)
Requirement already satisfied: flake8-polyfill<2,>=1.0.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pep8-naming<0.13,>=0.4->tidypy) (1.0.2)
Requirement already satisfied: snowballstemmer>=2.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pydocstyle<7,>=4->tidypy) (2.2.0)
Requirement already satisfied: platformdirs>=2.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pylint<2.12,>=2->tidypy) (3.10.0)
Requirement already satisfied: astroid<2.9,>=2.8.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pylint<2.12,>=2->tidypy) (2.8.6)
Requirement already satisfied: isort<6,>=4.2.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pylint<2.12,>=2->tidypy) (5.13.2)
Requirement already satisfied: docutils in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pyroma<4,>=2.2->tidypy) (0.20.1)
Requirement already satisfied: pygments in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pyroma<4,>=2.2->tidypy) (2.18.0)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests<3,>=2->tidypy) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests<3,>=2->tidypy) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests<3,>=2->tidypy) (2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests<3,>=2->tidypy) (2024.8.30)
Requirement already satisfied: tomli>=1.1.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from vulture<3,>=2.2->tidypy) (2.0.1)
Requirement already satisfied: pathspec>=0.5.3 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from yamllint<2,>=1.8->tidypy) (0.12.1)
Requirement already satisfied: lazy-object-proxy>=1.4.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from astroid<2.9,>=2.8.0->pylint<2.12,>=2->tidypy) (1.10.0)
Requirement already satisfied: wrapt<1.14,>=1.11 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from astroid<2.9,>=2.8.0->pylint<2.12,>=2->tidypy) (1.13.3)
Requirement already satisfied: packaging>=19.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from build>=0.1->check-manifest<0.48,>=0.42->tidypy) (24.1)
Requirement already satisfied: pyproject_hooks in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from build>=0.1->check-manifest<0.48,>=0.42->tidypy) (1.1.0)
Requirement already satisfied: pbr>=2.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from stevedore>=1.20.0->bandit<2,>=1.4->tidypy) (6.1.0)
Requirement already satisfied: markdown-it-py>=2.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from rich->bandit<2,>=1.4->tidypy) (3.0.0)
Requirement already satisfied: mdurl~=0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from markdown-it-py>=2.2.0->rich->bandit<2,>=1.4->tidypy) (0.1.2)
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Requirement already satisfied: autoviz in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (0.1.905)
Requirement already satisfied: xlrd in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (2.0.1)
Requirement already satisfied: wordcloud in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (1.9.3)
Requirement already satisfied: emoji in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (2.12.1)
Requirement already satisfied: pyamg in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (5.2.1)
Requirement already satisfied: scikit-learn in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (1.3.0)
Requirement already satisfied: statsmodels in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (0.14.3)
Requirement already satisfied: nltk in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (3.9.1)
Requirement already satisfied: textblob in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (0.18.0.post0)
Collecting xgboost<1.7,>=0.82 (from autoviz)
Downloading xgboost-1.6.2-py3-none-win_amd64.whl.metadata (1.8 kB)
Requirement already satisfied: fsspec>=0.8.3 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (2024.2.0)
Requirement already satisfied: typing-extensions>=4.1.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (4.12.2)
Requirement already satisfied: pandas-dq>=1.29 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (1.29)
Requirement already satisfied: numpy>=1.24.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (1.26.3)
Requirement already satisfied: hvplot>=0.9.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (0.10.0)
Requirement already satisfied: holoviews>=1.16.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (1.19.1)
Requirement already satisfied: panel>=1.4.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (1.5.0)
Requirement already satisfied: pandas>=2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (2.2.3)
Requirement already satisfied: matplotlib>3.7.4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (3.9.2)
Requirement already satisfied: seaborn>0.12.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from autoviz) (0.13.2)
Collecting bokeh>=3.1 (from holoviews>=1.16.0->autoviz)
Downloading bokeh-3.5.2-py3-none-any.whl.metadata (12 kB)
Requirement already satisfied: colorcet in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from holoviews>=1.16.0->autoviz) (3.1.0)
Requirement already satisfied: packaging in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from holoviews>=1.16.0->autoviz) (24.1)
Requirement already satisfied: param<3.0,>=2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from holoviews>=1.16.0->autoviz) (2.1.1)
Requirement already satisfied: pyviz-comms>=2.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from holoviews>=1.16.0->autoviz) (3.0.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>3.7.4->autoviz) (1.3.0)
Requirement already satisfied: cycler>=0.10 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>3.7.4->autoviz) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>3.7.4->autoviz) (4.53.1)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>3.7.4->autoviz) (1.4.7)
Requirement already satisfied: pillow>=8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>3.7.4->autoviz) (10.2.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>3.7.4->autoviz) (3.1.4)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>3.7.4->autoviz) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas>=2.0->autoviz) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas>=2.0->autoviz) (2024.1)
Requirement already satisfied: bleach in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from panel>=1.4.0->autoviz) (4.1.0)
Requirement already satisfied: linkify-it-py in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from panel>=1.4.0->autoviz) (2.0.3)
Requirement already satisfied: markdown in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from panel>=1.4.0->autoviz) (3.7)
Requirement already satisfied: markdown-it-py in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from panel>=1.4.0->autoviz) (3.0.0)
Requirement already satisfied: mdit-py-plugins in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from panel>=1.4.0->autoviz) (0.4.2)
Requirement already satisfied: requests in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from panel>=1.4.0->autoviz) (2.32.3)
Requirement already satisfied: tqdm in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from panel>=1.4.0->autoviz) (4.66.5)
Requirement already satisfied: scipy>=1.5.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn->autoviz) (1.13.1)
Requirement already satisfied: joblib>=1.1.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn->autoviz) (1.4.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn->autoviz) (3.5.0)
Requirement already satisfied: click in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from nltk->autoviz) (8.1.7)
Requirement already satisfied: regex>=2021.8.3 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from nltk->autoviz) (2021.11.10)
Requirement already satisfied: patsy>=0.5.6 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from statsmodels->autoviz) (0.5.6)
Requirement already satisfied: Jinja2>=2.9 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from bokeh>=3.1->holoviews>=1.16.0->autoviz) (3.1.4)
Requirement already satisfied: PyYAML>=3.10 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from bokeh>=3.1->holoviews>=1.16.0->autoviz) (6.0.1)
Requirement already satisfied: tornado>=6.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from bokeh>=3.1->holoviews>=1.16.0->autoviz) (6.4.1)
Requirement already satisfied: xyzservices>=2021.09.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from bokeh>=3.1->holoviews>=1.16.0->autoviz) (2024.9.0)
Requirement already satisfied: six in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from patsy>=0.5.6->statsmodels->autoviz) (1.16.0)
Requirement already satisfied: webencodings in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from bleach->panel>=1.4.0->autoviz) (0.5.1)
Requirement already satisfied: colorama in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from click->nltk->autoviz) (0.4.6)
Requirement already satisfied: uc-micro-py in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from linkify-it-py->panel>=1.4.0->autoviz) (1.0.3)
Requirement already satisfied: mdurl~=0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from markdown-it-py->panel>=1.4.0->autoviz) (0.1.2)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests->panel>=1.4.0->autoviz) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests->panel>=1.4.0->autoviz) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests->panel>=1.4.0->autoviz) (2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests->panel>=1.4.0->autoviz) (2024.8.30)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from Jinja2>=2.9->bokeh>=3.1->holoviews>=1.16.0->autoviz) (2.1.3)
Downloading xgboost-1.6.2-py3-none-win_amd64.whl (125.4 MB)
---------------------------------------- 0.0/125.4 MB ? eta -:--:--
---------------------------------------- 0.8/125.4 MB 6.7 MB/s eta 0:00:19
- -------------------------------------- 5.5/125.4 MB 21.0 MB/s eta 0:00:06
----- ---------------------------------- 16.3/125.4 MB 31.1 MB/s eta 0:00:04
------- -------------------------------- 24.4/125.4 MB 34.3 MB/s eta 0:00:03
---------- ----------------------------- 32.0/125.4 MB 34.4 MB/s eta 0:00:03
------------ --------------------------- 38.3/125.4 MB 34.3 MB/s eta 0:00:03
-------------- ------------------------- 44.8/125.4 MB 33.6 MB/s eta 0:00:03
---------------- ----------------------- 51.4/125.4 MB 33.4 MB/s eta 0:00:03
------------------ --------------------- 57.9/125.4 MB 33.3 MB/s eta 0:00:03
-------------------- ------------------- 64.7/125.4 MB 33.6 MB/s eta 0:00:02
---------------------- ----------------- 71.6/125.4 MB 33.6 MB/s eta 0:00:02
------------------------- -------------- 78.9/125.4 MB 33.6 MB/s eta 0:00:02
--------------------------- ------------ 86.0/125.4 MB 33.5 MB/s eta 0:00:02
----------------------------- ---------- 92.8/125.4 MB 33.6 MB/s eta 0:00:01
------------------------------- -------- 99.4/125.4 MB 33.4 MB/s eta 0:00:01
-------------------------------- ------ 105.9/125.4 MB 33.5 MB/s eta 0:00:01
----------------------------------- --- 113.2/125.4 MB 33.5 MB/s eta 0:00:01
------------------------------------ -- 117.7/125.4 MB 33.4 MB/s eta 0:00:01
-------------------------------------- 123.7/125.4 MB 32.4 MB/s eta 0:00:01
--------------------------------------- 125.4/125.4 MB 32.6 MB/s eta 0:00:00
Downloading bokeh-3.5.2-py3-none-any.whl (6.8 MB)
---------------------------------------- 0.0/6.8 MB ? eta -:--:--
------------------------------ --------- 5.2/6.8 MB 24.5 MB/s eta 0:00:01
---------------------------------------- 6.8/6.8 MB 27.8 MB/s eta 0:00:00
Installing collected packages: xgboost, bokeh
Attempting uninstall: xgboost
Found existing installation: xgboost 2.1.1
Uninstalling xgboost-2.1.1:
Successfully uninstalled xgboost-2.1.1
Attempting uninstall: bokeh
Found existing installation: bokeh 2.4.3
Uninstalling bokeh-2.4.3:
Successfully uninstalled bokeh-2.4.3
Successfully installed bokeh-3.5.2 xgboost-1.6.2
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. pdpbox 0.3.0 requires xgboost>=1.7.1, but you have xgboost 1.6.2 which is incompatible. dataprep 0.4.5 requires bokeh<3,>=2, but you have bokeh 3.5.2 which is incompatible. dataprep 0.4.5 requires jinja2<3.1,>=3.0, but you have jinja2 3.1.4 which is incompatible. dataprep 0.4.5 requires pandas<2.0,>=1.1, but you have pandas 2.2.3 which is incompatible.
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com Requirement already satisfied: seaborn in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (0.13.2) Requirement already satisfied: missingno in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (0.5.2) Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from seaborn) (1.26.3) Requirement already satisfied: pandas>=1.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from seaborn) (2.2.3) Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from seaborn) (3.9.2) Requirement already satisfied: scipy in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from missingno) (1.13.1) Requirement already satisfied: contourpy>=1.0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.0) Requirement already satisfied: cycler>=0.10 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1) Requirement already satisfied: fonttools>=4.22.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.53.1) Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.7) Requirement already satisfied: packaging>=20.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (24.1) Requirement already satisfied: pillow>=8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (10.2.0) Requirement already satisfied: pyparsing>=2.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.1.4) Requirement already satisfied: python-dateutil>=2.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas>=1.2->seaborn) (2024.1) Requirement already satisfied: tzdata>=2022.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas>=1.2->seaborn) (2024.1) Requirement already satisfied: six>=1.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.16.0) Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com Requirement already satisfied: pandas in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (2.2.3) Requirement already satisfied: numpy>=1.22.4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas) (1.26.3) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas) (2024.1) Requirement already satisfied: tzdata>=2022.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas) (2024.1) Requirement already satisfied: six>=1.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0) Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com Requirement already satisfied: sweetviz in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (2.3.1) Requirement already satisfied: pandas!=1.0.0,!=1.0.1,!=1.0.2,>=0.25.3 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sweetviz) (2.2.3) Requirement already satisfied: numpy>=1.16.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sweetviz) (1.26.3) Requirement already satisfied: matplotlib>=3.1.3 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sweetviz) (3.9.2) Requirement already satisfied: tqdm>=4.43.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sweetviz) (4.66.5) Requirement already satisfied: scipy>=1.3.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sweetviz) (1.13.1) Requirement already satisfied: jinja2>=2.11.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sweetviz) (3.1.4) Requirement already satisfied: importlib-resources>=1.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sweetviz) (6.4.5) Requirement already satisfied: MarkupSafe>=2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from jinja2>=2.11.1->sweetviz) (2.1.3) Requirement already satisfied: contourpy>=1.0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (1.3.0) Requirement already satisfied: cycler>=0.10 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (0.12.1) Requirement already satisfied: fonttools>=4.22.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (4.53.1) Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (1.4.7) Requirement already satisfied: packaging>=20.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (24.1) Requirement already satisfied: pillow>=8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (10.2.0) Requirement already satisfied: pyparsing>=2.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (3.1.4) Requirement already satisfied: python-dateutil>=2.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.1.3->sweetviz) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas!=1.0.0,!=1.0.1,!=1.0.2,>=0.25.3->sweetviz) (2024.1) Requirement already satisfied: tzdata>=2022.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas!=1.0.0,!=1.0.1,!=1.0.2,>=0.25.3->sweetviz) (2024.1) Requirement already satisfied: colorama in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tqdm>=4.43.0->sweetviz) (0.4.6) Requirement already satisfied: six>=1.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from python-dateutil>=2.7->matplotlib>=3.1.3->sweetviz) (1.16.0) Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com Requirement already satisfied: jinja2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (3.1.4) Requirement already satisfied: MarkupSafe>=2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from jinja2) (2.1.3) Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com Requirement already satisfied: pylint==2.11.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (2.11.1) Requirement already satisfied: platformdirs>=2.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pylint==2.11.1) (3.10.0) Requirement already satisfied: astroid<2.9,>=2.8.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pylint==2.11.1) (2.8.6) Requirement already satisfied: isort<6,>=4.2.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pylint==2.11.1) (5.13.2) Requirement already satisfied: mccabe<0.7,>=0.6 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pylint==2.11.1) (0.6.1) Requirement already satisfied: toml>=0.7.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pylint==2.11.1) (0.10.2) Requirement already satisfied: colorama in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pylint==2.11.1) (0.4.6) Requirement already satisfied: lazy-object-proxy>=1.4.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from astroid<2.9,>=2.8.0->pylint==2.11.1) (1.10.0) Requirement already satisfied: wrapt<1.14,>=1.11 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from astroid<2.9,>=2.8.0->pylint==2.11.1) (1.13.3) Requirement already satisfied: setuptools>=20.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from astroid<2.9,>=2.8.0->pylint==2.11.1) (72.1.0)
In [ ]:
# <H2>Installing and Importing Libraries</H2>
In [1]:
# Importing the Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import sweetviz as sv
import tidypy
import seaborn as sns
import missingno as msno
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from autoviz.AutoViz_Class import AutoViz_Class
Imported v0.1.905. Please call AutoViz in this sequence:
AV = AutoViz_Class()
%matplotlib inline
dfte = AV.AutoViz(filename, sep=',', depVar='', dfte=None, header=0, verbose=1, lowess=False,
chart_format='svg',max_rows_analyzed=150000,max_cols_analyzed=30, save_plot_dir=None)
In [2]:
# Hiding all warnings
warnings.filterwarnings('ignore')
In [3]:
# <H2>Data Collection and Preprocessing</H2>
In [4]:
# Loading the dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv'
column_names = ['pregnancies', 'glucose', 'blood_pressure', 'skin_thickness', 'insulin', 'bmi', 'diabetes_pedigree_function', 'age', 'outcome']
diabetes_df = pd.read_csv(url, names=column_names)
In [5]:
# <H2>Handling Missing Values</H2>
In [6]:
# Inspecting the dataset
print(diabetes_df.info())
print(diabetes_df.head())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 768 entries, 0 to 767 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 pregnancies 768 non-null int64 1 glucose 768 non-null int64 2 blood_pressure 768 non-null int64 3 skin_thickness 768 non-null int64 4 insulin 768 non-null int64 5 bmi 768 non-null float64 6 diabetes_pedigree_function 768 non-null float64 7 age 768 non-null int64 8 outcome 768 non-null int64 dtypes: float64(2), int64(7) memory usage: 54.1 KB None pregnancies glucose blood_pressure skin_thickness insulin bmi \ 0 6 148 72 35 0 33.6 1 1 85 66 29 0 26.6 2 8 183 64 0 0 23.3 3 1 89 66 23 94 28.1 4 0 137 40 35 168 43.1 diabetes_pedigree_function age outcome 0 0.627 50 1 1 0.351 31 0 2 0.672 32 1 3 0.167 21 0 4 2.288 33 1
In [7]:
# Checking for missing values
print(diabetes_df.isnull().sum())
# Replace 0 values with NaN for certain features
zero_not_accepted = ['glucose', 'blood_pressure', 'skin_thickness', 'insulin', 'bmi']
for column in zero_not_accepted:
diabetes_df[column] = diabetes_df[column].replace(0, np.nan)
# Imputing missing values
imputer = SimpleImputer(strategy='median')
diabetes_df[zero_not_accepted] = imputer.fit_transform(diabetes_df[zero_not_accepted])
# Verifying that there are no missing values left
print(diabetes_df.isnull().sum())
pregnancies 0 glucose 0 blood_pressure 0 skin_thickness 0 insulin 0 bmi 0 diabetes_pedigree_function 0 age 0 outcome 0 dtype: int64 pregnancies 0 glucose 0 blood_pressure 0 skin_thickness 0 insulin 0 bmi 0 diabetes_pedigree_function 0 age 0 outcome 0 dtype: int64
In [8]:
# <H2>Preprocessing Pipelines</H2>
In [9]:
# Defining numerical and categorical features
numerical_features = ['pregnancies', 'glucose', 'blood_pressure', 'skin_thickness', 'insulin', 'bmi', 'diabetes_pedigree_function', 'age']
categorical_features = [] # No categorical features in this dataset
# Defining numerical pipeline
numerical_pipeline = Pipeline(steps=[
('scaler', StandardScaler())
])
# Combining the pipelines using ColumnTransformer
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_pipeline, numerical_features)
]
)
# Splitting the data into features (X) and target (y)
X = diabetes_df.drop(columns=['outcome'])
y = diabetes_df['outcome']
# Applying the preprocessing
X_preprocessed = preprocessor.fit_transform(X)
# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)
print("Preprocessed Train Data Shape:", X_train.shape)
print("Preprocessed Test Data Shape:", X_test.shape)
Preprocessed Train Data Shape: (614, 8) Preprocessed Test Data Shape: (154, 8)
In [10]:
# <H2>AutoEDA using Sweetviz</H2>
In [11]:
# Generating a Sweetviz report
report = sv.analyze(diabetes_df)
# Saving the report to an HTML file
report.show_html("diabetes_sweetviz_report.html")
| | [ 0%] 00:00 -> (? left)
Report diabetes_sweetviz_report.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.
In [12]:
# <H2>AutoEDA using AutoViz</H2>
In [13]:
# Generating the AutoViz report
autoviz = AutoViz_Class()
%matplotlib inline
autoviz_report = autoviz.AutoViz(filename='', sep=',', depVar='', dfte=diabetes_df, header=0, verbose=0, lowess=False,
chart_format='svg', max_rows_analyzed=1500, max_cols_analyzed=30, save_plot_dir=None)
Shape of your Data Set loaded: (768, 9)
#######################################################################################
######################## C L A S S I F Y I N G V A R I A B L E S ####################
#######################################################################################
Classifying variables in data set...
9 Predictors classified...
No variables removed since no ID or low-information variables found in data set
All variables classified into correct types.
| Data Type | Missing Values% | Unique Values% | Minimum Value | Maximum Value | DQ Issue | |
|---|---|---|---|---|---|---|
| pregnancies | int64 | 0.000000 | 2 | 0.000000 | 17.000000 | Column has 4 outliers greater than upper bound (13.50) or lower than lower bound(-6.50). Cap them or remove them. |
| glucose | float64 | 0.000000 | NA | 44.000000 | 199.000000 | No issue |
| blood_pressure | float64 | 0.000000 | NA | 24.000000 | 122.000000 | Column has 14 outliers greater than upper bound (104.00) or lower than lower bound(40.00). Cap them or remove them. |
| skin_thickness | float64 | 0.000000 | NA | 7.000000 | 99.000000 | Column has 87 outliers greater than upper bound (42.50) or lower than lower bound(14.50). Cap them or remove them. |
| insulin | float64 | 0.000000 | NA | 14.000000 | 846.000000 | Column has 346 outliers greater than upper bound (135.88) or lower than lower bound(112.88). Cap them or remove them. |
| bmi | float64 | 0.000000 | NA | 18.200000 | 67.100000 | Column has 8 outliers greater than upper bound (50.25) or lower than lower bound(13.85). Cap them or remove them. |
| diabetes_pedigree_function | float64 | 0.000000 | NA | 0.078000 | 2.420000 | Column has 29 outliers greater than upper bound (1.20) or lower than lower bound(-0.33). Cap them or remove them. |
| age | int64 | 0.000000 | 6 | 21.000000 | 81.000000 | Column has 9 outliers greater than upper bound (66.50) or lower than lower bound(-1.50). Cap them or remove them. |
| outcome | int64 | 0.000000 | 0 | 0.000000 | 1.000000 | No issue |
Number of All Scatter Plots = 21
All Plots done Time to run AutoViz = 4 seconds ###################### AUTO VISUALIZATION Completed ########################
In [14]:
# <H2>Feature Engineering using Featuretools</H2>
In [15]:
import featuretools as ft
# Define entityset
es = ft.EntitySet(id='data')
# Add dataframe to entityset
es = es.add_dataframe(
dataframe_name='data',
dataframe=diabetes_df,
index='index' # Define a unique index
)
# Define features
feature_matrix, feature_defs = ft.dfs(
entityset=es,
target_dataframe_name='data'
)
print('After selecting features')
num_features = feature_matrix.shape[1]
print(f"Number of features: {num_features}")
print("Column names:", feature_matrix.columns)
After selecting features
Number of features: 9
Column names: Index(['pregnancies', 'glucose', 'blood_pressure', 'skin_thickness', 'insulin',
'bmi', 'diabetes_pedigree_function', 'age', 'outcome'],
dtype='object')
In [16]:
# <H3>Visualize Missing Data using missingno</H3>
In [17]:
%matplotlib inline
# Visualize missing data
msno.matrix(diabetes_df)
plt.show()
msno.bar(diabetes_df)
plt.show()
msno.heatmap(diabetes_df)
plt.show()
In [18]:
diabetes_df.head()
Out[18]:
| index | pregnancies | glucose | blood_pressure | skin_thickness | insulin | bmi | diabetes_pedigree_function | age | outcome | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 6 | 148.0 | 72.0 | 35.0 | 125.0 | 33.6 | 0.627 | 50 | 1 |
| 1 | 1 | 1 | 85.0 | 66.0 | 29.0 | 125.0 | 26.6 | 0.351 | 31 | 0 |
| 2 | 2 | 8 | 183.0 | 64.0 | 29.0 | 125.0 | 23.3 | 0.672 | 32 | 1 |
| 3 | 3 | 1 | 89.0 | 66.0 | 23.0 | 94.0 | 28.1 | 0.167 | 21 | 0 |
| 4 | 4 | 0 | 137.0 | 40.0 | 35.0 | 168.0 | 43.1 | 2.288 | 33 | 1 |
In [19]:
# <H1>Task: Model Selection, Training and Hyperparameter Tuning for Diabetes Dataset</H1>
# <H3>H2O AutoML</H3>
In [20]:
# Installing the library
!pip install h2o
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com Requirement already satisfied: h2o in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (3.46.0.5) Requirement already satisfied: requests in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from h2o) (2.32.3) Requirement already satisfied: tabulate in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from h2o) (0.9.0) Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests->h2o) (3.3.2) Requirement already satisfied: idna<4,>=2.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests->h2o) (3.7) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests->h2o) (2.2.3) Requirement already satisfied: certifi>=2017.4.17 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests->h2o) (2024.8.30)
In [21]:
# Importing the required libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from h2o.automl import H2OAutoML
from h2o.explanation import explain
import h2o
import pandas as pd
import numpy as np
import warnings
In [22]:
# Hide all warnings
warnings.filterwarnings('ignore')
In [23]:
# Initializing H2O cluster
h2o.init()
# Loading the Diabetes dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv'
column_names = ['pregnancies', 'glucose', 'blood_pressure', 'skin_thickness', 'insulin', 'bmi', 'diabetes_pedigree_function', 'age', 'outcome']
df = pd.read_csv(url, names=column_names)
# Checking for missing values
print("Missing values in each column:")
print(df.isna().sum())
# Replace 0 values with NaN for certain features
zero_not_accepted = ['glucose', 'blood_pressure', 'skin_thickness', 'insulin', 'bmi']
for column in zero_not_accepted:
df[column] = df[column].replace(0, np.nan)
# Imputing missing values with median
for column in zero_not_accepted:
df[column].fillna(df[column].median(), inplace=True)
# Converting to H2OFrame for H2O processing
h2o_df = h2o.H2OFrame(df)
# Splitting dataset into training and test sets
train, test = h2o_df.split_frame(ratios=[.8], seed=123)
# Converting the target column to categorical in H2OFrame
train['outcome'] = train['outcome'].asfactor()
test['outcome'] = test['outcome'].asfactor()
# Defining features and target
target = 'outcome'
features = list(train.columns)
features.remove(target)
# Training the H2O AutoML model
aml = H2OAutoML(max_runtime_secs=60, seed=123, max_models=5)
aml.train(x=features, y=target, training_frame=train)
# Leaderboard view
lb = aml.leaderboard
print("Leaderboard:")
print(lb.head())
# Getting the leader model
leader_model = aml.leader
# Printing the model ID and type
print("Model ID:", leader_model.model_id)
print("Model Type:", leader_model.algo)
# Printing the specific hyperparameters
print("Hyperparameters for the leader model:")
params = leader_model.params
for param, value in params.items():
print(f"{param}: {value}")
# Printing the detailed model summary
print("Leader Model Summary:")
print(leader_model.summary())
# Making predictions on the test set
predictions = leader_model.predict(test)
# Evaluating the best model
performance = leader_model.model_performance(test)
print(performance)
# Saving the best model
best_model_path = h2o.save_model(model=leader_model, path="./best_diabetes_model", force=True)
print(f"Model saved to: {best_model_path}")
Checking whether there is an H2O instance running at http://localhost:54321..... not found. Attempting to start a local H2O server... ; Java HotSpot(TM) 64-Bit Server VM (build 23+37-2369, mixed mode, sharing) Starting server from C:\Users\manis\anaconda3\envs\CAIAssignment21\Lib\site-packages\h2o\backend\bin\h2o.jar Ice root: C:\Users\manis\AppData\Local\Temp\tmpy5uzehm9 JVM stdout: C:\Users\manis\AppData\Local\Temp\tmpy5uzehm9\h2o_manis_started_from_python.out JVM stderr: C:\Users\manis\AppData\Local\Temp\tmpy5uzehm9\h2o_manis_started_from_python.err Server is running at http://127.0.0.1:54321 Connecting to H2O server at http://127.0.0.1:54321 ... successful.
| H2O_cluster_uptime: | 01 secs |
| H2O_cluster_timezone: | Asia/Kolkata |
| H2O_data_parsing_timezone: | UTC |
| H2O_cluster_version: | 3.46.0.5 |
| H2O_cluster_version_age: | 25 days |
| H2O_cluster_name: | H2O_from_python_manis_71hfe8 |
| H2O_cluster_total_nodes: | 1 |
| H2O_cluster_free_memory: | 15.77 Gb |
| H2O_cluster_total_cores: | 32 |
| H2O_cluster_allowed_cores: | 32 |
| H2O_cluster_status: | locked, healthy |
| H2O_connection_url: | http://127.0.0.1:54321 |
| H2O_connection_proxy: | {"http": null, "https": null} |
| H2O_internal_security: | False |
| Python_version: | 3.10.14 final |
Missing values in each column:
pregnancies 0
glucose 0
blood_pressure 0
skin_thickness 0
insulin 0
bmi 0
diabetes_pedigree_function 0
age 0
outcome 0
dtype: int64
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |
22:55:49.673: AutoML: XGBoost is not available; skipping it.
███████████████████████████████████████████████████████████████| (done) 100%
Leaderboard:
model_id auc logloss aucpr mean_per_class_error rmse mse
StackedEnsemble_AllModels_1_AutoML_1_20240923_225549 0.845825 0.459673 0.724099 0.233751 0.387529 0.150179
GLM_1_AutoML_1_20240923_225549 0.844965 0.458449 0.733648 0.235343 0.387601 0.150234
StackedEnsemble_BestOfFamily_1_AutoML_1_20240923_225549 0.843989 0.46193 0.72594 0.231225 0.387274 0.149981
GBM_3_AutoML_1_20240923_225549 0.831345 0.478111 0.707057 0.228535 0.39561 0.156507
DRF_1_AutoML_1_20240923_225549 0.829625 0.527775 0.708123 0.246926 0.396465 0.157184
GBM_2_AutoML_1_20240923_225549 0.824788 0.486066 0.691543 0.238966 0.400853 0.160683
GBM_1_AutoML_1_20240923_225549 0.823171 0.485168 0.681923 0.251208 0.400555 0.160445
[7 rows x 7 columns]
Model ID: StackedEnsemble_AllModels_1_AutoML_1_20240923_225549
Model Type: stackedensemble
Hyperparameters for the leader model:
model_id: {'default': None, 'actual': {'__meta': {'schema_version': 3, 'schema_name': 'ModelKeyV3', 'schema_type': 'Key<Model>'}, 'name': 'StackedEnsemble_AllModels_1_AutoML_1_20240923_225549', 'type': 'Key<Model>', 'URL': '/3/Models/StackedEnsemble_AllModels_1_AutoML_1_20240923_225549'}, 'input': None}
training_frame: {'default': None, 'actual': {'__meta': {'schema_version': 3, 'schema_name': 'FrameKeyV3', 'schema_type': 'Key<Frame>'}, 'name': 'AutoML_1_20240923_225549_training_py_4_sid_b206', 'type': 'Key<Frame>', 'URL': '/3/Frames/AutoML_1_20240923_225549_training_py_4_sid_b206'}, 'input': {'__meta': {'schema_version': 3, 'schema_name': 'FrameKeyV3', 'schema_type': 'Key<Frame>'}, 'name': 'AutoML_1_20240923_225549_training_py_4_sid_b206', 'type': 'Key<Frame>', 'URL': '/3/Frames/AutoML_1_20240923_225549_training_py_4_sid_b206'}}
response_column: {'default': None, 'actual': {'__meta': {'schema_version': 3, 'schema_name': 'ColSpecifierV3', 'schema_type': 'VecSpecifier'}, 'column_name': 'outcome', 'is_member_of_frames': None}, 'input': {'__meta': {'schema_version': 3, 'schema_name': 'ColSpecifierV3', 'schema_type': 'VecSpecifier'}, 'column_name': 'outcome', 'is_member_of_frames': None}}
validation_frame: {'default': None, 'actual': None, 'input': None}
blending_frame: {'default': None, 'actual': None, 'input': None}
base_models: {'default': [], 'actual': [{'__meta': {'schema_version': 3, 'schema_name': 'KeyV3', 'schema_type': 'Key<Keyed>'}, 'name': 'GLM_1_AutoML_1_20240923_225549', 'type': 'Key<Keyed>', 'URL': None}, {'__meta': {'schema_version': 3, 'schema_name': 'KeyV3', 'schema_type': 'Key<Keyed>'}, 'name': 'GBM_3_AutoML_1_20240923_225549', 'type': 'Key<Keyed>', 'URL': None}, {'__meta': {'schema_version': 3, 'schema_name': 'KeyV3', 'schema_type': 'Key<Keyed>'}, 'name': 'DRF_1_AutoML_1_20240923_225549', 'type': 'Key<Keyed>', 'URL': None}, {'__meta': {'schema_version': 3, 'schema_name': 'KeyV3', 'schema_type': 'Key<Keyed>'}, 'name': 'GBM_2_AutoML_1_20240923_225549', 'type': 'Key<Keyed>', 'URL': None}, {'__meta': {'schema_version': 3, 'schema_name': 'KeyV3', 'schema_type': 'Key<Keyed>'}, 'name': 'GBM_1_AutoML_1_20240923_225549', 'type': 'Key<Keyed>', 'URL': None}], 'input': [{'__meta': {'schema_version': 3, 'schema_name': 'KeyV3', 'schema_type': 'Key<Keyed>'}, 'name': 'GLM_1_AutoML_1_20240923_225549', 'type': 'Key<Keyed>', 'URL': None}, {'__meta': {'schema_version': 3, 'schema_name': 'KeyV3', 'schema_type': 'Key<Keyed>'}, 'name': 'GBM_3_AutoML_1_20240923_225549', 'type': 'Key<Keyed>', 'URL': None}, {'__meta': {'schema_version': 3, 'schema_name': 'KeyV3', 'schema_type': 'Key<Keyed>'}, 'name': 'DRF_1_AutoML_1_20240923_225549', 'type': 'Key<Keyed>', 'URL': None}, {'__meta': {'schema_version': 3, 'schema_name': 'KeyV3', 'schema_type': 'Key<Keyed>'}, 'name': 'GBM_2_AutoML_1_20240923_225549', 'type': 'Key<Keyed>', 'URL': None}, {'__meta': {'schema_version': 3, 'schema_name': 'KeyV3', 'schema_type': 'Key<Keyed>'}, 'name': 'GBM_1_AutoML_1_20240923_225549', 'type': 'Key<Keyed>', 'URL': None}]}
metalearner_algorithm: {'default': 'AUTO', 'actual': 'glm', 'input': 'AUTO'}
metalearner_nfolds: {'default': 0, 'actual': 5, 'input': 5}
metalearner_fold_assignment: {'default': None, 'actual': None, 'input': None}
metalearner_fold_column: {'default': None, 'actual': None, 'input': None}
metalearner_params: {'default': '', 'actual': '', 'input': ''}
metalearner_transform: {'default': 'NONE', 'actual': 'Logit', 'input': 'Logit'}
max_runtime_secs: {'default': 0.0, 'actual': 0.0, 'input': 0.0}
weights_column: {'default': None, 'actual': None, 'input': None}
offset_column: {'default': None, 'actual': None, 'input': None}
custom_metric_func: {'default': None, 'actual': None, 'input': None}
seed: {'default': -1, 'actual': 129, 'input': 129}
score_training_samples: {'default': 10000, 'actual': 10000, 'input': 10000}
keep_levelone_frame: {'default': False, 'actual': True, 'input': True}
export_checkpoints_dir: {'default': None, 'actual': None, 'input': None}
auc_type: {'default': 'AUTO', 'actual': 'AUTO', 'input': 'AUTO'}
gainslift_bins: {'default': -1, 'actual': -1, 'input': -1}
Leader Model Summary:
Model Summary for Stacked Ensemble:
key value
------------------------------------ ----------------
Stacking strategy cross_validation
Number of base models (used / total) 3/5
# GBM base models (used / total) 1/3
# GLM base models (used / total) 1/1
# DRF base models (used / total) 1/1
Metalearner algorithm GLM
Metalearner fold assignment scheme Random
Metalearner nfolds 5
Metalearner fold_column
Custom metalearner hyperparameters None
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%
ModelMetricsBinomialGLM: stackedensemble
** Reported on test data. **
MSE: 0.16892739951477156
RMSE: 0.4110077852240412
LogLoss: 0.5189223393152905
AUC: 0.8190416141235813
AUCPR: 0.6619127486368385
Gini: 0.6380832282471627
Null degrees of freedom: 164
Residual degrees of freedom: 161
Null deviance: 217.9068262006641
Residual deviance: 171.2443719740459
AIC: 179.2443719740459
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.32807776772900576
0 1 Error Rate
----- --- --- ------- ------------
0 75 29 0.2788 (29.0/104.0)
1 10 51 0.1639 (10.0/61.0)
Total 85 80 0.2364 (39.0/165.0)
Maximum Metrics: Maximum metrics at their respective thresholds
metric threshold value idx
--------------------------- ----------- -------- -----
max f1 0.328078 0.723404 79
max f2 0.165585 0.821727 114
max f0point5 0.43457 0.682594 57
max accuracy 0.347928 0.769697 74
max precision 0.819301 0.8125 15
max recall 0.020867 1 153
max specificity 0.976962 0.990385 0
max absolute_mcc 0.328078 0.538212 79
max min_per_class_accuracy 0.35604 0.75 71
max mean_per_class_accuracy 0.328078 0.77861 79
max tns 0.976962 103 0
max fns 0.976962 61 0
max fps 0.00433073 104 164
max tps 0.020867 61 153
max tnr 0.976962 0.990385 0
max fnr 0.976962 1 0
max fpr 0.00433073 1 164
max tpr 0.020867 1 153
Gains/Lift Table: Avg response rate: 36.97 %, avg score: 36.41 %
group cumulative_data_fraction lower_threshold lift cumulative_lift response_rate score cumulative_response_rate cumulative_score capture_rate cumulative_capture_rate gain cumulative_gain kolmogorov_smirnov
------- -------------------------- ----------------- -------- ----------------- --------------- --------- -------------------------- ------------------ -------------- ------------------------- -------- ----------------- --------------------
1 0.0121212 0.940318 1.35246 1.35246 0.5 0.959759 0.5 0.959759 0.0163934 0.0163934 35.2459 35.2459 0.00677806
2 0.0242424 0.93037 1.35246 1.35246 0.5 0.934927 0.5 0.947343 0.0163934 0.0327869 35.2459 35.2459 0.0135561
3 0.030303 0.924322 2.70492 1.62295 1 0.929279 0.6 0.94373 0.0163934 0.0491803 170.492 62.2951 0.0299496
4 0.0424242 0.917628 2.70492 1.93208 1 0.92373 0.714286 0.938016 0.0327869 0.0819672 170.492 93.2084 0.0627364
5 0.0545455 0.901943 1.35246 1.80328 0.5 0.907954 0.666667 0.931335 0.0163934 0.0983607 35.2459 80.3279 0.0695145
6 0.10303 0.810341 2.3668 2.06847 0.875 0.854651 0.764706 0.895249 0.114754 0.213115 136.68 106.847 0.174653
7 0.151515 0.723868 1.69057 1.94754 0.625 0.767484 0.72 0.854364 0.0819672 0.295082 69.0574 94.7541 0.227774
8 0.2 0.653378 2.3668 2.04918 0.875 0.683284 0.757576 0.81289 0.114754 0.409836 136.68 104.918 0.332913
9 0.30303 0.483757 1.59113 1.89344 0.588235 0.560331 0.7 0.72702 0.163934 0.57377 59.1128 89.3443 0.42954
10 0.4 0.397524 1.1834 1.72131 0.4375 0.434047 0.636364 0.655996 0.114754 0.688525 18.3402 72.1311 0.457755
11 0.50303 0.296656 1.43202 1.66206 0.529412 0.347203 0.614458 0.592749 0.147541 0.836066 43.2015 66.2058 0.528373
12 0.6 0.227298 0.507172 1.47541 0.1875 0.258085 0.545455 0.538662 0.0491803 0.885246 -49.2828 47.541 0.452554
13 0.69697 0.15997 0.845287 1.38774 0.3125 0.19112 0.513043 0.490308 0.0819672 0.967213 -15.4713 38.7741 0.428752
14 0.8 0.100423 0 1.20902 0 0.130993 0.44697 0.444033 0 0.967213 -100 20.9016 0.26529
15 0.89697 0.0437803 0.169057 1.09659 0.0625 0.0736188 0.405405 0.403988 0.0163934 0.983607 -83.0943 9.65884 0.137453
16 1 0.00433073 0.159113 1 0.0588235 0.0169509 0.369697 0.364112 0.0163934 1 -84.0887 0 0
Model saved to: C:\Users\manis\MLOPs_assignment_2\Final\best_diabetes_model\StackedEnsemble_AllModels_1_AutoML_1_20240923_225549
In [24]:
# <H2>Explainable Model using H2O tools</H2>
In [25]:
# Explaining model using H2O's built-in tools
explanations = explain(leader_model, test)
print(explanations)
Confusion Matrix
Confusion matrix shows a predicted class vs an actual class.
StackedEnsemble_AllModels_1_AutoML_1_20240923_225549
| 0 | 1 | Error | Rate | |
|---|---|---|---|---|
| 0 | 75.0 | 29.0 | 0.2788 | (29.0/104.0) |
| 1 | 10.0 | 51.0 | 0.1639 | (10.0/61.0) |
| Total | 85.0 | 80.0 | 0.2364 | (39.0/165.0) |
Learning Curve Plot
Learning curve plot shows the loss function/metric dependent on number of iterations or trees for tree-based algorithms. This plot can be useful for determining whether the model overfits.
Partial Dependence Plots
Partial dependence plot (PDP) gives a graphical depiction of the marginal effect of a variable on the response. The effect of a variable is measured in change in the mean response. PDP assumes independence between the feature for which is the PDP computed and the rest.
H2OExplanation([('confusion_matrix', H2OExplanation([('header', <h2o.explanation._explain.Header object at 0x000001E8007881F0>), ('description', <h2o.explanation._explain.Description object at 0x000001E87BC4F850>), ('subexplanations', H2OExplanation([('StackedEnsemble_AllModels_1_AutoML_1_20240923_225549', H2OExplanation([('header', <h2o.explanation._explain.Header object at 0x000001E80025CD60>), ('plots', H2OExplanation([('StackedEnsemble_AllModels_1_AutoML_1_20240923_225549', ConfusionMatrix({'table': H2OTwoDimTable({'_table_header': 'Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.32807776772900576', '_col_header': ['', '0', '1', 'Error', 'Rate']})}))]))]))]))])), ('learning_curve', H2OExplanation([('header', <h2o.explanation._explain.Header object at 0x000001E80025DBA0>), ('description', <h2o.explanation._explain.Description object at 0x000001E80025DC00>), ('plots', H2OExplanation([('StackedEnsemble_AllModels_1_AutoML_1_20240923_225549', <h2o.plot._plot_result._MObject object at 0x000001E801109930>)]))])), ('pdp', H2OExplanation([('header', <h2o.explanation._explain.Header object at 0x000001E80025C1C0>), ('description', <h2o.explanation._explain.Description object at 0x000001E80025EB60>), ('plots', H2OExplanation([('pregnancies', <h2o.plot._plot_result._MObject object at 0x000001E8011704C0>), ('glucose', <h2o.plot._plot_result._MObject object at 0x000001E80042FDF0>), ('blood_pressure', <h2o.plot._plot_result._MObject object at 0x000001E8012190C0>), ('skin_thickness', <h2o.plot._plot_result._MObject object at 0x000001E8012CE620>), ('insulin', <h2o.plot._plot_result._MObject object at 0x000001E80138FB50>), ('bmi', <h2o.plot._plot_result._MObject object at 0x000001E80133F460>), ('diabetes_pedigree_function', <h2o.plot._plot_result._MObject object at 0x000001E8014DFF70>), ('age', <h2o.plot._plot_result._MObject object at 0x000001E80162F730>)]))]))])
In [26]:
# Loading the saved model
loaded_model = h2o.load_model(best_model_path)
# Making predictions with the loaded model
predictions = loaded_model.predict(test)
# Converting to pandas DataFrame
predictions_df = predictions.as_data_frame()
# Assuming the prediction column is named 'predict'
predictions_df['predicted_class'] = (predictions_df['p1'] >= 0.5).astype(int)
# Showing predictions
print(predictions_df.head())
# Separating true labels
true_labels = test[target].as_data_frame()
true_labels_df = true_labels.values.ravel()
# Calculate accuracy
accuracy = accuracy_score(true_labels_df, predictions_df['predicted_class'])
print(f'Accuracy: {accuracy}')
# Calculate confusion matrix
conf_matrix = confusion_matrix(true_labels_df, predictions_df['predicted_class'])
print('Confusion Matrix:')
print(conf_matrix)
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100% predict p0 p1 predicted_class 0 1 0.180699 0.819301 1 1 1 0.589277 0.410723 0 2 1 0.397333 0.602667 1 3 0 0.951857 0.048143 0 4 1 0.564928 0.435072 0 Accuracy: 0.7393939393939394 Confusion Matrix: [[89 15] [28 33]]
In [27]:
# <H2>Evaluating the Model</H2>
In [28]:
# Calculate accuracy
accuracy = accuracy_score(true_labels_df, predictions_df['predicted_class'])
print(f'Accuracy: {accuracy}')
# Calculate confusion matrix
conf_matrix = confusion_matrix(true_labels_df, predictions_df['predicted_class'])
print('Confusion Matrix:')
print(conf_matrix)
Accuracy: 0.7393939393939394 Confusion Matrix: [[89 15] [28 33]]
In [62]:
# Shutdown the H2O cluster
h2o.shutdown()
H2O session _sid_b206 closed.
In [29]:
# <H1>Task: Explainable AI (XAI) Implementation for Diabetes Dataset</H1>
In [30]:
# Installing the libraries
!pip install lime
!pip install anchor-exp
!pip install pdpbox
!pip install pdpbox
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Requirement already satisfied: lime in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (0.2.0.1)
Requirement already satisfied: matplotlib in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from lime) (3.9.2)
Requirement already satisfied: numpy in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from lime) (1.26.3)
Requirement already satisfied: scipy in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from lime) (1.13.1)
Requirement already satisfied: tqdm in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from lime) (4.66.5)
Requirement already satisfied: scikit-learn>=0.18 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from lime) (1.3.0)
Requirement already satisfied: scikit-image>=0.12 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from lime) (0.24.0)
Requirement already satisfied: networkx>=2.8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-image>=0.12->lime) (3.2.1)
Requirement already satisfied: pillow>=9.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-image>=0.12->lime) (10.2.0)
Requirement already satisfied: imageio>=2.33 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-image>=0.12->lime) (2.35.1)
Requirement already satisfied: tifffile>=2022.8.12 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-image>=0.12->lime) (2024.9.20)
Requirement already satisfied: packaging>=21 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-image>=0.12->lime) (24.1)
Requirement already satisfied: lazy-loader>=0.4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-image>=0.12->lime) (0.4)
Requirement already satisfied: joblib>=1.1.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn>=0.18->lime) (1.4.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn>=0.18->lime) (3.5.0)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime) (1.3.0)
Requirement already satisfied: cycler>=0.10 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime) (4.53.1)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime) (1.4.7)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime) (3.1.4)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime) (2.9.0.post0)
Requirement already satisfied: colorama in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tqdm->lime) (0.4.6)
Requirement already satisfied: six>=1.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from python-dateutil>=2.7->matplotlib->lime) (1.16.0)
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Requirement already satisfied: anchor-exp in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (0.0.2.0)
Requirement already satisfied: numpy in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from anchor-exp) (1.26.3)
Requirement already satisfied: scipy in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from anchor-exp) (1.13.1)
Requirement already satisfied: spacy in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from anchor-exp) (3.7.6)
Requirement already satisfied: lime in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from anchor-exp) (0.2.0.1)
Requirement already satisfied: scikit-learn>=0.22 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from anchor-exp) (1.3.0)
Requirement already satisfied: joblib>=1.1.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn>=0.22->anchor-exp) (1.4.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn>=0.22->anchor-exp) (3.5.0)
Requirement already satisfied: matplotlib in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from lime->anchor-exp) (3.9.2)
Requirement already satisfied: tqdm in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from lime->anchor-exp) (4.66.5)
Requirement already satisfied: scikit-image>=0.12 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from lime->anchor-exp) (0.24.0)
Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (3.0.12)
Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (1.0.5)
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (1.0.10)
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (2.0.8)
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (3.0.9)
Requirement already satisfied: thinc<8.3.0,>=8.2.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (8.2.5)
Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (1.1.3)
Requirement already satisfied: srsly<3.0.0,>=2.4.3 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (2.4.8)
Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (2.0.10)
Requirement already satisfied: weasel<0.5.0,>=0.1.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (0.4.1)
Requirement already satisfied: typer<1.0.0,>=0.3.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (0.12.5)
Requirement already satisfied: requests<3.0.0,>=2.13.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (2.32.3)
Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (1.10.18)
Requirement already satisfied: jinja2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (3.1.4)
Requirement already satisfied: setuptools in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (72.1.0)
Requirement already satisfied: packaging>=20.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (24.1)
Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from spacy->anchor-exp) (3.4.0)
Requirement already satisfied: language-data>=1.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from langcodes<4.0.0,>=3.2.0->spacy->anchor-exp) (1.2.0)
Requirement already satisfied: typing-extensions>=4.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy->anchor-exp) (4.12.2)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests<3.0.0,>=2.13.0->spacy->anchor-exp) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests<3.0.0,>=2.13.0->spacy->anchor-exp) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests<3.0.0,>=2.13.0->spacy->anchor-exp) (2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests<3.0.0,>=2.13.0->spacy->anchor-exp) (2024.8.30)
Requirement already satisfied: networkx>=2.8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-image>=0.12->lime->anchor-exp) (3.2.1)
Requirement already satisfied: pillow>=9.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-image>=0.12->lime->anchor-exp) (10.2.0)
Requirement already satisfied: imageio>=2.33 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-image>=0.12->lime->anchor-exp) (2.35.1)
Requirement already satisfied: tifffile>=2022.8.12 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-image>=0.12->lime->anchor-exp) (2024.9.20)
Requirement already satisfied: lazy-loader>=0.4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-image>=0.12->lime->anchor-exp) (0.4)
Requirement already satisfied: blis<0.8.0,>=0.7.8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from thinc<8.3.0,>=8.2.2->spacy->anchor-exp) (0.7.11)
Requirement already satisfied: confection<1.0.0,>=0.0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from thinc<8.3.0,>=8.2.2->spacy->anchor-exp) (0.1.5)
Requirement already satisfied: colorama in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from tqdm->lime->anchor-exp) (0.4.6)
Requirement already satisfied: click>=8.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from typer<1.0.0,>=0.3.0->spacy->anchor-exp) (8.1.7)
Requirement already satisfied: shellingham>=1.3.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from typer<1.0.0,>=0.3.0->spacy->anchor-exp) (1.5.4)
Requirement already satisfied: rich>=10.11.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from typer<1.0.0,>=0.3.0->spacy->anchor-exp) (13.8.1)
Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from weasel<0.5.0,>=0.1.0->spacy->anchor-exp) (0.19.0)
Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from weasel<0.5.0,>=0.1.0->spacy->anchor-exp) (7.0.4)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from jinja2->spacy->anchor-exp) (2.1.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime->anchor-exp) (1.3.0)
Requirement already satisfied: cycler>=0.10 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime->anchor-exp) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime->anchor-exp) (4.53.1)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime->anchor-exp) (1.4.7)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime->anchor-exp) (3.1.4)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib->lime->anchor-exp) (2.9.0.post0)
Requirement already satisfied: marisa-trie>=0.7.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy->anchor-exp) (1.2.0)
Requirement already satisfied: six>=1.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from python-dateutil>=2.7->matplotlib->lime->anchor-exp) (1.16.0)
Requirement already satisfied: markdown-it-py>=2.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy->anchor-exp) (3.0.0)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy->anchor-exp) (2.18.0)
Requirement already satisfied: wrapt in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy->anchor-exp) (1.13.3)
Requirement already satisfied: mdurl~=0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy->anchor-exp) (0.1.2)
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Requirement already satisfied: pdpbox in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (0.3.0)
Requirement already satisfied: joblib>=1.1.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (1.4.2)
Requirement already satisfied: matplotlib>=3.6.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (3.9.2)
Requirement already satisfied: numpy>=1.21.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (1.26.3)
Requirement already satisfied: pandas>=1.4.4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (2.2.3)
Requirement already satisfied: plotly>=5.9.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (5.24.1)
Requirement already satisfied: pqdm>=0.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (0.2.0)
Requirement already satisfied: psutil>=5.9.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (6.0.0)
Requirement already satisfied: pytest in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (8.3.3)
Requirement already satisfied: scikit-learn>=1.0.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (1.3.0)
Requirement already satisfied: setuptools in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (72.1.0)
Requirement already satisfied: sphinx>=5.0.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (7.4.7)
Requirement already satisfied: sphinx-rtd-theme>=1.1.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (2.0.0)
Requirement already satisfied: tqdm>=4.64.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (4.66.5)
Requirement already satisfied: numpydoc>=1.4.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (1.8.0)
Collecting xgboost>=1.7.1 (from pdpbox)
Downloading xgboost-2.1.1-py3-none-win_amd64.whl.metadata (2.1 kB)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (1.3.0)
Requirement already satisfied: cycler>=0.10 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (4.53.1)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (1.4.7)
Requirement already satisfied: packaging>=20.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (10.2.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (3.1.4)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (2.9.0.post0)
Requirement already satisfied: tabulate>=0.8.10 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from numpydoc>=1.4.0->pdpbox) (0.9.0)
Requirement already satisfied: tomli>=1.1.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from numpydoc>=1.4.0->pdpbox) (2.0.1)
Requirement already satisfied: pytz>=2020.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas>=1.4.4->pdpbox) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas>=1.4.4->pdpbox) (2024.1)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from plotly>=5.9.0->pdpbox) (8.5.0)
Requirement already satisfied: bounded-pool-executor in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pqdm>=0.2.0->pdpbox) (0.0.3)
Requirement already satisfied: typing-extensions in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pqdm>=0.2.0->pdpbox) (4.12.2)
Requirement already satisfied: scipy>=1.5.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn>=1.0.2->pdpbox) (1.13.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn>=1.0.2->pdpbox) (3.5.0)
Requirement already satisfied: sphinxcontrib-applehelp in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.0.0)
Requirement already satisfied: sphinxcontrib-devhelp in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.0.0)
Requirement already satisfied: sphinxcontrib-jsmath in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (1.0.1)
Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.1.0)
Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.9 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.0.0)
Requirement already satisfied: sphinxcontrib-qthelp in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.0.0)
Requirement already satisfied: Jinja2>=3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (3.1.4)
Requirement already satisfied: Pygments>=2.17 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.18.0)
Requirement already satisfied: docutils<0.22,>=0.20 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (0.20.1)
Requirement already satisfied: snowballstemmer>=2.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.2.0)
Requirement already satisfied: babel>=2.13 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.16.0)
Requirement already satisfied: alabaster~=0.7.14 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (0.7.16)
Requirement already satisfied: imagesize>=1.3 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (1.4.1)
Requirement already satisfied: requests>=2.30.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.32.3)
Requirement already satisfied: colorama>=0.4.6 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (0.4.6)
Requirement already satisfied: sphinxcontrib-jquery<5,>=4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx-rtd-theme>=1.1.1->pdpbox) (4.1)
Requirement already satisfied: iniconfig in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pytest->pdpbox) (2.0.0)
Requirement already satisfied: pluggy<2,>=1.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pytest->pdpbox) (1.5.0)
Requirement already satisfied: exceptiongroup>=1.0.0rc8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pytest->pdpbox) (1.2.0)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from Jinja2>=3.1->sphinx>=5.0.2->pdpbox) (2.1.3)
Requirement already satisfied: six>=1.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from python-dateutil>=2.7->matplotlib>=3.6.2->pdpbox) (1.16.0)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests>=2.30.0->sphinx>=5.0.2->pdpbox) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests>=2.30.0->sphinx>=5.0.2->pdpbox) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests>=2.30.0->sphinx>=5.0.2->pdpbox) (2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests>=2.30.0->sphinx>=5.0.2->pdpbox) (2024.8.30)
Downloading xgboost-2.1.1-py3-none-win_amd64.whl (124.9 MB)
---------------------------------------- 0.0/124.9 MB ? eta -:--:--
---------------------------------------- 0.8/124.9 MB 8.3 MB/s eta 0:00:15
- -------------------------------------- 5.8/124.9 MB 19.6 MB/s eta 0:00:07
---- ----------------------------------- 12.8/124.9 MB 25.2 MB/s eta 0:00:05
------ --------------------------------- 19.1/124.9 MB 26.3 MB/s eta 0:00:05
-------- ------------------------------- 25.4/124.9 MB 27.3 MB/s eta 0:00:04
---------- ----------------------------- 32.0/124.9 MB 27.8 MB/s eta 0:00:04
------------ --------------------------- 38.5/124.9 MB 28.2 MB/s eta 0:00:04
-------------- ------------------------- 45.1/124.9 MB 28.4 MB/s eta 0:00:03
---------------- ----------------------- 51.4/124.9 MB 28.7 MB/s eta 0:00:03
------------------ --------------------- 57.9/124.9 MB 28.8 MB/s eta 0:00:03
-------------------- ------------------- 64.7/124.9 MB 29.1 MB/s eta 0:00:03
---------------------- ----------------- 71.6/124.9 MB 29.2 MB/s eta 0:00:02
------------------------- -------------- 78.4/124.9 MB 29.4 MB/s eta 0:00:02
--------------------------- ------------ 84.9/124.9 MB 29.5 MB/s eta 0:00:02
----------------------------- ---------- 91.5/124.9 MB 29.6 MB/s eta 0:00:02
------------------------------- -------- 98.3/124.9 MB 29.7 MB/s eta 0:00:01
-------------------------------- ------ 104.3/124.9 MB 29.7 MB/s eta 0:00:01
---------------------------------- ---- 109.6/124.9 MB 29.4 MB/s eta 0:00:01
----------------------------------- --- 114.8/124.9 MB 29.2 MB/s eta 0:00:01
------------------------------------- - 120.3/124.9 MB 29.0 MB/s eta 0:00:01
--------------------------------------- 124.9/124.9 MB 28.8 MB/s eta 0:00:00
Installing collected packages: xgboost
Attempting uninstall: xgboost
Found existing installation: xgboost 1.6.2
Uninstalling xgboost-1.6.2:
Successfully uninstalled xgboost-1.6.2
Successfully installed xgboost-2.1.1
WARNING: Failed to remove contents in a temporary directory 'C:\Users\manis\anaconda3\envs\CAIAssignment21\Lib\site-packages\~gboost'. You can safely remove it manually. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. autoviz 0.1.905 requires xgboost<1.7,>=0.82, but you have xgboost 2.1.1 which is incompatible.
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com Requirement already satisfied: pdpbox in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (0.3.0) Requirement already satisfied: joblib>=1.1.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (1.4.2) Requirement already satisfied: matplotlib>=3.6.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (3.9.2) Requirement already satisfied: numpy>=1.21.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (1.26.3) Requirement already satisfied: pandas>=1.4.4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (2.2.3) Requirement already satisfied: plotly>=5.9.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (5.24.1) Requirement already satisfied: pqdm>=0.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (0.2.0) Requirement already satisfied: psutil>=5.9.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (6.0.0) Requirement already satisfied: pytest in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (8.3.3) Requirement already satisfied: scikit-learn>=1.0.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (1.3.0) Requirement already satisfied: setuptools in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (72.1.0) Requirement already satisfied: sphinx>=5.0.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (7.4.7) Requirement already satisfied: sphinx-rtd-theme>=1.1.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (2.0.0) Requirement already satisfied: tqdm>=4.64.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (4.66.5) Requirement already satisfied: numpydoc>=1.4.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (1.8.0) Requirement already satisfied: xgboost>=1.7.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pdpbox) (2.1.1) Requirement already satisfied: contourpy>=1.0.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (1.3.0) Requirement already satisfied: cycler>=0.10 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (0.12.1) Requirement already satisfied: fonttools>=4.22.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (4.53.1) Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (1.4.7) Requirement already satisfied: packaging>=20.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (24.1) Requirement already satisfied: pillow>=8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (10.2.0) Requirement already satisfied: pyparsing>=2.3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (3.1.4) Requirement already satisfied: python-dateutil>=2.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from matplotlib>=3.6.2->pdpbox) (2.9.0.post0) Requirement already satisfied: tabulate>=0.8.10 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from numpydoc>=1.4.0->pdpbox) (0.9.0) Requirement already satisfied: tomli>=1.1.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from numpydoc>=1.4.0->pdpbox) (2.0.1) Requirement already satisfied: pytz>=2020.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas>=1.4.4->pdpbox) (2024.1) Requirement already satisfied: tzdata>=2022.7 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pandas>=1.4.4->pdpbox) (2024.1) Requirement already satisfied: tenacity>=6.2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from plotly>=5.9.0->pdpbox) (8.5.0) Requirement already satisfied: bounded-pool-executor in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pqdm>=0.2.0->pdpbox) (0.0.3) Requirement already satisfied: typing-extensions in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pqdm>=0.2.0->pdpbox) (4.12.2) Requirement already satisfied: scipy>=1.5.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn>=1.0.2->pdpbox) (1.13.1) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn>=1.0.2->pdpbox) (3.5.0) Requirement already satisfied: sphinxcontrib-applehelp in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.0.0) Requirement already satisfied: sphinxcontrib-devhelp in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.0.0) Requirement already satisfied: sphinxcontrib-jsmath in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (1.0.1) Requirement already satisfied: sphinxcontrib-htmlhelp>=2.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.1.0) Requirement already satisfied: sphinxcontrib-serializinghtml>=1.1.9 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.0.0) Requirement already satisfied: sphinxcontrib-qthelp in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.0.0) Requirement already satisfied: Jinja2>=3.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (3.1.4) Requirement already satisfied: Pygments>=2.17 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.18.0) Requirement already satisfied: docutils<0.22,>=0.20 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (0.20.1) Requirement already satisfied: snowballstemmer>=2.2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.2.0) Requirement already satisfied: babel>=2.13 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.16.0) Requirement already satisfied: alabaster~=0.7.14 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (0.7.16) Requirement already satisfied: imagesize>=1.3 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (1.4.1) Requirement already satisfied: requests>=2.30.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (2.32.3) Requirement already satisfied: colorama>=0.4.6 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx>=5.0.2->pdpbox) (0.4.6) Requirement already satisfied: sphinxcontrib-jquery<5,>=4 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from sphinx-rtd-theme>=1.1.1->pdpbox) (4.1) Requirement already satisfied: iniconfig in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pytest->pdpbox) (2.0.0) Requirement already satisfied: pluggy<2,>=1.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pytest->pdpbox) (1.5.0) Requirement already satisfied: exceptiongroup>=1.0.0rc8 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from pytest->pdpbox) (1.2.0) Requirement already satisfied: MarkupSafe>=2.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from Jinja2>=3.1->sphinx>=5.0.2->pdpbox) (2.1.3) Requirement already satisfied: six>=1.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from python-dateutil>=2.7->matplotlib>=3.6.2->pdpbox) (1.16.0) Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests>=2.30.0->sphinx>=5.0.2->pdpbox) (3.3.2) Requirement already satisfied: idna<4,>=2.5 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests>=2.30.0->sphinx>=5.0.2->pdpbox) (3.7) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests>=2.30.0->sphinx>=5.0.2->pdpbox) (2.2.3) Requirement already satisfied: certifi>=2017.4.17 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from requests>=2.30.0->sphinx>=5.0.2->pdpbox) (2024.8.30)
In [31]:
# Importing the required libraries
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from lime.lime_tabular import LimeTabularExplainer
from sklearn.model_selection import train_test_split
from anchor import anchor_tabular
from anchor import anchor_tabular
from pdpbox import pdp
from PyALE import ale
from pdpbox import pdp
import matplotlib.pyplot as plt
from sklearn.inspection import PartialDependenceDisplay
In [ ]:
In [32]:
# <H1>Implementing LIME</H1>
In [33]:
# LIME: Local Interpretable Model-agnostic Explanations.
In [34]:
# <H2>Loading the Diabetes dataset</H2>
In [ ]:
# Loading the Diabetes dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv'
column_names = ['pregnancies', 'glucose', 'blood_pressure', 'skin_thickness', 'insulin', 'bmi', 'diabetes_pedigree_function', 'age', 'outcome']
df = pd.read_csv(url, names=column_names)
In [ ]:
In [35]:
# <H2>Preprocessing the Data</H2>
In [36]:
# Replace 0 values with NaN for certain features
zero_not_accepted = ['glucose', 'blood_pressure', 'skin_thickness', 'insulin', 'bmi']
for column in zero_not_accepted:
df[column] = df[column].replace(0, np.nan)
# Imputing missing values with median
for column in zero_not_accepted:
df[column].fillna(df[column].median(), inplace=True)
# Checking for NaN values
if df.isnull().sum().any():
print("Data contains NaN values. Filling NaN values...")
df.fillna(method='ffill', inplace=True)
# No need for encoding as all features are numerical in this dataset
# Defining the features and target
X = df.drop(columns=['outcome']) # Drop 'outcome' (dependent variable)
y = df['outcome'] # 'outcome' is the dependent variable (presence of diabetes)
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
In [ ]:
In [37]:
# <H2>Training a model using Random Forest</H2>
In [38]:
# Training Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
Out[38]:
RandomForestClassifier(random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(random_state=42)
In [39]:
# <H2>Creating a LIME Explainer</H2>
In [40]:
# Creating a LIME Explainer
lime_explainer = LimeTabularExplainer(X_train.values,
feature_names=X_train.columns,
class_names=['No Diabetes', 'Diabetes'],
mode='classification')
In [41]:
# Selecting an Instance to Explain
instance = X_test.iloc[5] # Change the index to select different instances
In [42]:
# Generating the Explanation
exp = lime_explainer.explain_instance(instance.values, model.predict_proba, num_features=8)
In [43]:
# If you're not in a Jupyter notebook, you can use the following to save the explanation as an HTML file:
In [44]:
# Explanation Display
exp.show_in_notebook(show_table=True)
exp.save_to_file('diabetes_lime_explanation.html')
In [45]:
# Print feature importances for this instance
print("\nFeature importances for this instance:")
for feature, importance in exp.as_list():
print(f"{feature}: {importance}")
Feature importances for this instance: age > 40.00: 0.11902477712296315 bmi > 36.38: 0.10559407748346268 100.00 < glucose <= 117.00: -0.09253983545876131 insulin > 129.75: 0.025477406331946492 64.00 < blood_pressure <= 72.00: 0.015406489683079908 29.00 < skin_thickness <= 32.00: 0.01490296113605346 0.24 < diabetes_pedigree_function <= 0.37: -0.011361647696196495 3.00 < pregnancies <= 6.00: -0.00241708231033229
In [46]:
# <H2>Implementing Anchors</H2>
In [47]:
# Creating an Anchor Explainer
anchor_explainer = anchor_tabular.AnchorTabularExplainer(
class_names=['No Diabetes', 'Diabetes'],
feature_names=X_train.columns,
train_data=X_train.values,
categorical_names={}
)
In [48]:
# Generating Anchor Explanation
anchor_exp = anchor_explainer.explain_instance(instance.values, model.predict, threshold=0.95)
print("\nAnchor Explanation:")
print('Anchor: %s' % (' AND '.join(anchor_exp.names())))
print('Precision: %.2f' % anchor_exp.precision())
print('Coverage: %.2f' % anchor_exp.coverage())
Anchor Explanation: Anchor: age > 40.00 AND insulin > 129.75 AND bmi > 36.38 AND diabetes_pedigree_function > 0.24 AND 100.00 < glucose <= 117.00 AND blood_pressure <= 72.00 AND skin_thickness > 24.00 Precision: 0.97 Coverage: 0.00
In [49]:
# <H2>Implementing Partial Dependence Plots (PDPs)</H2>
In [50]:
#def plot_pdp(model, X, feature):
# pdp_isolate = pdp.pdp_isolate(model=model, dataset=X, model_features=X.columns, feature=feature)
# pdp.pdp_plot(pdp_isolate, feature)
# plt.show()
def plot_pdp(model, X, features):
fig, ax = plt.subplots(figsize=(10, 6))
PartialDependenceDisplay.from_estimator(model, X, features, ax=ax)
plt.show()
In [51]:
# Generate PDPs for a few important features
important_features = ['glucose', 'bmi', 'age']
plot_pdp(model, X, important_features)
In [52]:
# <H2>Implementing Accumulated Local Effects (ALE) Plots</H2>
In [53]:
def plot_ale(model, X, feature):
ale_eff = ale(
X=X,
model=model,
feature=[feature],
plot=True,
grid_size=50,
include_CI=True,
# fig_kw={'figsize': (10, 6)}
)
plt.show()
In [54]:
# Generate ALE plots for the same important features
for feature in important_features:
plot_ale(model, X, feature)
PyALE._ALE_generic:INFO: Continuous feature detected.
PyALE._ALE_generic:INFO: Continuous feature detected.
PyALE._ALE_generic:INFO: Continuous feature detected.
In [55]:
# Print model prediction for this instance
prediction = model.predict(instance.values.reshape(1, -1))[0]
print(f"\nModel prediction for this instance: {'Diabetes' if prediction == 1 else 'No Diabetes'}")
Model prediction for this instance: Diabetes
In [56]:
# Print actual outcome for this instance
actual = y_test.iloc[5]
print(f"Actual outcome for this instance: {'Diabetes' if actual == 1 else 'No Diabetes'}")
Actual outcome for this instance: No Diabetes
In [57]:
# <H2>Creating a model as a pickle file for Diabetes Prediction</H2>
!pip cache purge
!pip install scikit-learn==1.3.0
ERROR: pip cache commands can not function since cache is disabled.
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com Requirement already satisfied: scikit-learn==1.3.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (1.3.0) Requirement already satisfied: numpy>=1.17.3 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn==1.3.0) (1.26.3) Requirement already satisfied: scipy>=1.5.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn==1.3.0) (1.13.1) Requirement already satisfied: joblib>=1.1.1 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn==1.3.0) (1.4.2) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\manis\anaconda3\envs\caiassignment21\lib\site-packages (from scikit-learn==1.3.0) (3.5.0)
In [58]:
import pandas as pd
import numpy as np
import pickle
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
In [59]:
# <H2> Creating and Saving the model as pickle file</H2>
# Function to load and preprocess data
def load_and_preprocess_data(url):
column_names = ['pregnancies', 'glucose', 'blood_pressure', 'skin_thickness', 'insulin', 'bmi', 'diabetes_pedigree_function', 'age', 'outcome']
data = pd.read_csv(url, names=column_names)
# Replace 0 values with NaN for certain features
zero_not_accepted = ['glucose', 'blood_pressure', 'skin_thickness', 'insulin', 'bmi']
for column in zero_not_accepted:
data[column] = data[column].replace(0, np.nan)
# Impute missing values with median
for column in zero_not_accepted:
data[column].fillna(data[column].median(), inplace=True)
return data
# Loading and preprocess the data
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
data = load_and_preprocess_data(url)
# Preparing the feature and target variables
X = data.drop(columns=['outcome'])
y = data['outcome']
# Splitting data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Training RandomForest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)
# Saving the model and scaler
with open('diabetes_model.pkl', 'wb') as f:
pickle.dump(model, f)
with open('diabetes_scaler.pkl', 'wb') as f:
pickle.dump(scaler, f)
print("Model and scaler saved as 'diabetes_model.pkl' and 'diabetes_scaler.pkl'")
Model and scaler saved as 'diabetes_model.pkl' and 'diabetes_scaler.pkl'
In [60]:
# <H2>Model Prediction</H2>
# Loading the model and making predictions
with open('diabetes_model.pkl', 'rb') as f:
loaded_model = pickle.load(f)
predictions = loaded_model.predict(X_test_scaled)
comparison = (predictions == y_test.values)
# Creating DataFrame for displaying results
df = pd.DataFrame(X_test_scaled, columns=X_train.columns)
df['Prediction'] = predictions
df['Actual'] = y_test.values
df['Correct'] = comparison
print(df.head(15))
pregnancies glucose blood_pressure skin_thickness insulin bmi \
0 0.681856 -0.791466 -1.177033 0.510202 0.561935 0.237865
1 -0.526397 -0.326051 0.229156 0.391226 -0.175620 0.483180
2 -0.526397 -0.459026 -0.680731 0.034298 -0.175620 -0.223904
3 1.285983 -0.492270 0.642740 0.034298 -0.175620 -1.118582
4 0.983919 0.471804 1.469910 0.034298 -0.175620 -0.353777
5 0.681856 -0.625246 -0.018995 0.391226 0.561935 0.771785
6 -0.828460 -1.689053 -2.004202 -1.274439 -0.731622 -1.724654
7 -1.130523 -0.159831 -0.018995 0.034298 -0.175620 0.209004
8 0.077730 1.070196 -0.018995 0.034298 -0.164273 -0.151753
9 0.379793 0.837488 0.477306 0.034298 -0.175620 0.194574
10 1.890109 -0.359295 -0.184429 -0.203654 -0.175620 -0.700104
11 0.983919 1.901294 1.883494 0.272250 -0.175620 0.266725
12 0.077730 0.870732 -1.011599 -0.203654 2.014349 -0.209474
13 0.379793 -0.857954 0.146439 -1.274439 -0.833745 0.180144
14 -0.526397 -1.123906 -1.177033 -0.322631 -1.412441 -0.570231
diabetes_pedigree_function age Prediction Actual Correct
0 -0.116372 0.878091 1 0 False
1 -0.954231 -1.035940 0 0 True
2 -0.924520 -1.035940 0 0 True
3 1.149329 0.095078 0 0 True
4 -0.770021 1.487101 1 0 False
5 -0.431312 1.922108 1 0 False
6 -0.434283 -0.948939 0 0 True
7 1.375135 0.965092 1 0 False
8 -0.389716 0.356082 1 0 False
9 -0.746252 2.792122 1 0 False
10 -0.975029 0.617087 0 1 False
11 -0.906693 2.357115 1 0 False
12 -0.948289 -0.339929 0 1 False
13 1.568258 0.878091 0 0 True
14 0.881927 -0.948939 0 0 True
In [61]:
# <H2>Model Evaluation</H2>
# Evaluating the model
conf_matrix = confusion_matrix(y_test, predictions)
print("Confusion Matrix:")
print(conf_matrix)
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy:.4f}")
class_report = classification_report(y_test, predictions, target_names=['No Diabetes', 'Diabetes'])
print("Classification Report:")
print(class_report)
Confusion Matrix:
[[78 21]
[20 35]]
Accuracy: 0.7338
Classification Report:
precision recall f1-score support
No Diabetes 0.80 0.79 0.79 99
Diabetes 0.62 0.64 0.63 55
accuracy 0.73 154
macro avg 0.71 0.71 0.71 154
weighted avg 0.73 0.73 0.73 154
In [ ]: